library(grf)
library(foreign)
library(haven)
library(rio)
library(party)
library(mltools)
library(mlr)
library(data.table)
library(dplyr)
library(labelled)
library(caTools)
library(xgboost)
library(caret)
library(tree)
library(datasets)
require(haven)
library(Matrix)
library(gtools)
library(binsreg)

rm(list=ls())


### Untreated ###

train <- rio::import("I:/Workdata/706727/Build/Data/MLOutPred_Train.dta")
full <- rio::import("I:/Workdata/706727/Build/Data/MLOutPred_Full.dta")

set.seed(112)

models <- list()

Yvars <- select(train,firstpsyc,psyc, docvisit, specdoc, medphyc, inpat, outpat, er)

for (i in 1:8) {

# Find hyperparamters based on pre-period #
X <- as.matrix(select(train[train$distyrm<=-1,],-pnr,-firstpsyc,-psyc, -docvisit, -specdoc, -medphyc, -inpat, -outpat, -er))

Y <- Yvars[train$distyrm<=-1,i]
set.seed(112)

lrn <- makeLearner("regr.xgboost")
lrn$par.vals <- list(objective="reg:squarederror", eval_metric="rmse",nthread=16,early_stopping_rounds=5, nrounds=20L, eta=1.5)

gridparams <- makeParamSet(makeIntegerParam("max_depth",lower=1L,upper=10L),
                           makeNumericParam("subsample",lower=0.25,upper=1),
                           makeNumericParam("colsample_bytree",lower=0.25,upper=1),
                           makeNumericParam("gamma",lower=0,upper=10))

dataframetrain <- as.data.frame(X)
dataframetrain$Y <- Y
traintask <- makeRegrTask(data=dataframetrain, target="Y")

rdesc <- makeResampleDesc("CV",  iters=2L)
ctrl <- makeTuneControlRandom(maxit=10L)
mytune <- tuneParams(learner=lrn, task=traintask, resampling=rdesc, measures=rmse,
                     par.set=gridparams, control=ctrl, show.info=T)

xgb_params <- list(
  booster="gbtree",
  eta=0.1,
  nthread=16,
  objective="reg:squarederror",
  eval_metric="rmse")

xgb_params <- append(xgb_params, mytune$x)
  
  for (m in 1:49) {
  
  y=-5+(m-1)*0.25
  
  X <- as.matrix(select(train[which(train$distyrm==y),],-pnr,-firstpsyc,-psyc, -docvisit, -specdoc, -medphyc, -inpat, -outpat, -er))
  Xfull <- as.matrix(select(full[which(full$distyrm==y),],-pnr,-firstpsyc,-psyc, -docvisit, -specdoc, -medphyc, -inpat, -outpat, -er))
  
  Y <- Yvars[which(train$distyrm==y),i]
  
  xgb_train <- xgb.DMatrix(data=X, label=Y)
  
  # Number of rounds #
  xgbcv <- xgb.cv(
    params=xgb_params,
    data=xgb_train,
    nrounds=500,
    early_stopping_rounds=5,
    nfold=5
  )
  numrounds <- xgbcv$best_iteration
  
  models[[m]] <- xgb.train(params=xgb_params, data=xgb_train, nrounds=numrounds, verbose=1)

  #### Predict sample ###
  pred <- predict(models[[m]],as.matrix(Xfull),reshape=T)
  
  if (y==-5 & i==1){
    outdat <- cbind(select(full[which(full$distyrm==y),],pnr,dupli,distyrm),pred)
  }
  if (y>-5 & i==1){
    outdat <- rbind(outdat,cbind(select(full[which(full$distyrm==y),],pnr,dupli,distyrm),pred))
  }
  if (y==-5 & i>1){
    temp <- as.matrix(pred)
  }
  if (y>-5 & i>1){
    temp <- rbind(temp,as.matrix(pred))
  }
  if (y==7 & i>1){
    outdat <- cbind(outdat,temp)
  }
    
  }
}
colnames(outdat) <- c("pnr","dupli","distyrm", "firstpsyc","psyc" , "docvisit" , "specdoc" , "medphyc" , "inpat" , "outpat" , "er")

rio::export(outdat,"I:/Workdata/706727/Build/Data/MLOutPred.dta")

